/*

	This file cleans the export data

*/

cd ${work}4_reg_variables

*** PREPARE EXPORT DATA
use jurnr vare land year vrd vgt mgd enhedskode enhedskodetekst using input\export_ipdt ///
	if inrange(year,2002,2015) , clear

*CONCORDANCE OF PRODUCT CODES
destring vare, gen(cn8) force
drop if cn8==.
drop vare

merge m:1 cn8 year using input\cn8_cn8plus_2002_2015, keepusing(cn8plus) keep(3) nogen
drop cn8
rename cn8plus vnr

*KEEP SAMPLE DESTINATIONS
merge m:1 land using output\sample_d, assert(1 3) keep(3) keepusing(land) nogen

*KEEP FIRM-YEAR SAMPLE
merge m:1 jurnr year using output\sample_it, keep(3) keepusing(jurnr) nogen

*DETERMINE WHETHER WEIGHT (VGT) OR AMOUNT (MGD) IS RELEVANT QUANTITY
*1) DUMMY FOR UNIQUE UNIT (ENHEDSKODE) WITHIN PRODUCT
preserve
keep vnr enhedskode
duplicates drop
bysort vnr: gen uniqueunit=(_N==1)
keep vnr uniqueunit
duplicates drop
save temp\uniqueunit, replace
restore

merge m:1 vnr using temp\uniqueunit, assert(3) nogen
erase temp\uniqueunit.dta

*2) DETERMINE TOTAL EXPORT VALUE ACROSS OBSERVATOINS FOR NON-MISSING WEIGHTS/AMOUNT
sort vnr

gen temp=vrd if mgd!=. & mgd!=.
by vnr: egen vrd_mgd=total(temp)
drop temp

gen temp=vrd if vgt!=. & vgt!=0
by vnr: egen vrd_vgt=total(temp)
drop temp

*3) LET AMOUNT BE RELEVANT QUANTITY IF UNIT IS NOT KG, UNIT IS UNIQUE WITHIN PRODUCT, AND VALUE OF OBSERVATIONS WITH NON-MISSING AMOUNTS EXCEEDS THAT OF OBSERVATIONS WITH NON-MISSING WEIGHTS. OTHERWISE CHOOSE WEIGHT.
gen 	quantity = vgt
replace quantity = mgd if enhedskode!="00" & uniqueunit==1 & vrd_mgd>=vrd_vgt

gen		unit = "kg"
replace unit = enhedskodetekst if enhedskode!="00" & uniqueunit==1 & vrd_mgd>=vrd_vgt

*EXCLUDE MISSINGS
keep if vrd!=. & vrd>0 & quantity!=. & quantity>0

*COLLAPSE
collapse (sum) vrd_exp=vrd quantity , by(jurnr vnr unit land year) fast

*THRESHOLD
gen vrd_exp_exsmall=vrd_exp if vrd_exp>7.5

*SAVE FPDY
compress
save output\export_ipdt, replace